# 导入pandas库
import pandas as pd

# 第2步 : 读取CSV文件
salary = pd.read_csv('https://github.com/ybifoundation/Dataset/raw/main/Salary%20Data.csv')

print(salary.head())  # 显示前5行数据以确认读取成功
print(salary.columns) # 显示列名以确认列名正确

# 第3步 : 定义x和y
y = salary['Salary']
x = salary[['Experience Years']]

# 第4步 : 划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x,y, train_size=0.7, random_state=2529)

# 第5步 : 创建线性回归模型并训练
from sklearn.linear_model import LinearRegression
model = LinearRegression()

model.fit(X_train, y_train)

# 第6步 : 进行预测
y_pred = model.predict(X_test)
print(y_pred)

# 第7步 : 评估模型
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

print(mean_absolute_percentage_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))

# 第8步 : 可视化结果
import matplotlib.pyplot as plt
plt.scatter(X_test, y_test, color='blue', label='Actual Salary')
plt.scatter(X_test, y_pred, color='red', label='Predicted Salary')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Actual vs Predicted Salary')
plt.legend()
plt.show()
plt.savefig('salary_prediction.png')  # 保存图像